<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang="zh-CN">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2"/>
<meta name="theme-color" content="#222">












<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />






















<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=6.1.0" rel="stylesheet" type="text/css" />


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=6.1.0">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=6.1.0">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=6.1.0">


  <link rel="mask-icon" href="/images/logo.svg?v=6.1.0" color="#222">









<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Gemini',
    version: '6.1.0',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: false,
    fastclick: false,
    lazyload: false,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>


  




  <meta name="description" content="此为龙果学院课程笔记，记录以供以后翻看 Redis高可用集群实战如何做到99.99%高可用性 什么叫99.99%的高可用性？  在365天 * 99.99%的时间内，你的系统都是可以对外提供服务的，那就是高可用性，99.99%。  redis不可用是什么？单实例不可用？主从架构不可用？不可用的后果是什么？  如果是master进程被杀了，或者系统宕机了，那就无法提供服务了。但是如果是集群中某一个s">
<meta name="keywords" content="redis">
<meta property="og:type" content="article">
<meta property="og:title" content="高可用缓存架构实战3-Redis高可用集群实战">
<meta property="og:url" content="http://www.saily.top/2018/02/12/cache03/index.html">
<meta property="og:site_name" content="帆的博客">
<meta property="og:description" content="此为龙果学院课程笔记，记录以供以后翻看 Redis高可用集群实战如何做到99.99%高可用性 什么叫99.99%的高可用性？  在365天 * 99.99%的时间内，你的系统都是可以对外提供服务的，那就是高可用性，99.99%。  redis不可用是什么？单实例不可用？主从架构不可用？不可用的后果是什么？  如果是master进程被杀了，或者系统宕机了，那就无法提供服务了。但是如果是集群中某一个s">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="https://images2015.cnblogs.com/blog/27612/201707/27612-20170701230311383-1540605556.png">
<meta property="og:image" content="http://www.saily.top/img/cache/最老土的hash算法以及弊端.png">
<meta property="og:image" content="http://www.saily.top/img/cache/一致性hash算法的讲解和优点.png">
<meta property="og:image" content="http://www.saily.top/img/cache/一致性hash算法的虚拟节点实现负载均衡.png">
<meta property="og:image" content="http://www.saily.top/img/cache/redis%20cluster%20hash%20slot算法.png">
<meta property="og:updated_time" content="2018-08-25T07:18:16.741Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="高可用缓存架构实战3-Redis高可用集群实战">
<meta name="twitter:description" content="此为龙果学院课程笔记，记录以供以后翻看 Redis高可用集群实战如何做到99.99%高可用性 什么叫99.99%的高可用性？  在365天 * 99.99%的时间内，你的系统都是可以对外提供服务的，那就是高可用性，99.99%。  redis不可用是什么？单实例不可用？主从架构不可用？不可用的后果是什么？  如果是master进程被杀了，或者系统宕机了，那就无法提供服务了。但是如果是集群中某一个s">
<meta name="twitter:image" content="https://images2015.cnblogs.com/blog/27612/201707/27612-20170701230311383-1540605556.png">



  <link rel="alternate" href="/atom.xml" title="帆的博客" type="application/atom+xml" />




  <link rel="canonical" href="http://www.saily.top/2018/02/12/cache03/"/>



<script type="text/javascript" id="page.configurations">
  CONFIG.page = {
    sidebar: "",
  };
</script>

  <title>高可用缓存架构实战3-Redis高可用集群实战 | 帆的博客</title>
  









  <noscript>
  <style type="text/css">
    .use-motion .motion-element,
    .use-motion .brand,
    .use-motion .menu-item,
    .sidebar-inner,
    .use-motion .post-block,
    .use-motion .pagination,
    .use-motion .comments,
    .use-motion .post-header,
    .use-motion .post-body,
    .use-motion .collection-title { opacity: initial; }

    .use-motion .logo,
    .use-motion .site-title,
    .use-motion .site-subtitle {
      opacity: initial;
      top: initial;
    }

    .use-motion {
      .logo-line-before i { left: initial; }
      .logo-line-after i { right: initial; }
    }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-CN">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">帆的博客</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">扬帆起航</p>
      
  </div>

  <div class="site-nav-toggle">
    <button aria-label="切换导航栏">
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>




<nav class="site-nav">
  
    <ul id="menu" class="menu">
      
        
        
        
          
          <li class="menu-item menu-item-home">
    <a href="/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-home"></i> <br />首页</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-about">
    <a href="/about/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-user"></i> <br />关于</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-tags">
    <a href="/tags/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />标签</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-categories">
    <a href="/categories/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-th"></i> <br />分类</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-archives">
    <a href="/archives/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />归档</a>
  </li>

      
      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br />搜索</a>
        </li>
      
    </ul>
  

  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off"
             placeholder="搜索..." spellcheck="false"
             type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



  



</div>
    </header>

    


    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          
            

          
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://www.saily.top/2018/02/12/cache03/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="杨帆">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/img/photo/bug.png">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="帆的博客">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">高可用缓存架构实战3-Redis高可用集群实战
              
            
          </h1>
        

        <div class="post-meta">
          <span class="post-time">
            
                
            

            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-02-12T17:27:23+08:00">2月 12 2018</time>
            

            
            

            
          </span>

          
            <span class="post-category" >
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing"><a href="/categories/高可用缓存架构实战/" itemprop="url" rel="index"><span itemprop="name">高可用缓存架构实战</span></a></span>

                
                
              
            </span>
          

          
            
          

          
          

          
            <span class="post-meta-divider">|</span>
            <span class="post-meta-item-icon"
            >
            <i class="fa fa-eye"></i>
             阅读次数： 
            <span class="busuanzi-value" id="busuanzi_value_page_pv" ></span>
            </span>
          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>此为龙果学院课程笔记，记录以供以后翻看</p>
<h1 id="Redis高可用集群实战"><a href="#Redis高可用集群实战" class="headerlink" title="Redis高可用集群实战"></a>Redis高可用集群实战</h1><h2 id="如何做到99-99-高可用性"><a href="#如何做到99-99-高可用性" class="headerlink" title="如何做到99.99%高可用性"></a>如何做到99.99%高可用性</h2><ol>
<li><p>什么叫99.99%的高可用性？</p>
<p> 在365天 * 99.99%的时间内，你的系统都是可以对外提供服务的，那就是高可用性，99.99%。</p>
</li>
<li><p>redis不可用是什么？单实例不可用？主从架构不可用？不可用的后果是什么？</p>
<p> 如果是master进程被杀了，或者系统宕机了，那就无法提供服务了。但是如果是集群中某一个slave挂掉了，没问题，还有其他的slave可以提供服务。</p>
</li>
<li><p>Redis怎么才能做到高可用？</p>
<p> 如果master挂了怎么办？Redis有个故障转移功能，如果master node故障时，自动检测，并且将某个slave node自动切换为master node，也可以叫做主备切换，这实现了redis主从架构下的高可用性，这其中会用到Redis的哨兵架构（它会去检测）。<br> 一旦master故障，在很短的时间内，就会切换到另外一个master上去，可能就几分钟，或者几秒钟是不可用的。    </p>
</li>
</ol>
<a id="more"></a>
<h2 id="Redis哨兵架构介绍"><a href="#Redis哨兵架构介绍" class="headerlink" title="Redis哨兵架构介绍"></a>Redis哨兵架构介绍</h2><p>Sentinel（哨兵）是Redis 的高可用性解决方案：由一个或多个Sentinel 实例 组成的Sentinel 系统可以监视任意多个主服务器，以及这些主服务器属下的所有从服务器，并在被监视的主服务器进入下线状态时，自动将下线主服务器属下的某个从服务器升级为新的主服务器。</p>
<p>哨兵是redis集群架构中非常重要的一个组件，主要功能如下：</p>
<ol>
<li>集群监控，负责监控redis master和slave进程是否正常工作。</li>
<li>消息通知，如果某个redis实例有故障，那么哨兵负责发送消息作为报警通知给管理员。</li>
<li>故障转移，如果master node挂掉了，会自动转移到slave node上。</li>
<li>配置中心，如果故障转移发生了，通知client客户端新的master地址。</li>
</ol>
<p>哨兵本身也是分布式的，作为一个哨兵集群去运行，互相协同工作：</p>
<ol>
<li>执行故障转移时，判断一个master node是宕机了，需要大部分的哨兵都同意才行，涉及到了分布式选举的问题。</li>
<li>即使部分哨兵节点挂掉了，哨兵集群还是能正常工作的，因为如果一个作为高可用机制重要组成部分的故障转移系统本身是单点的，那就坑爹了。</li>
</ol>
<h3 id="哨兵的核心知识"><a href="#哨兵的核心知识" class="headerlink" title="哨兵的核心知识"></a>哨兵的核心知识</h3><ol>
<li><strong>哨兵至少需要3个实例</strong>，来保证自己的健壮性</li>
<li>哨兵 + redis主从的部署架构，是不会保证数据零丢失的，只能保证redis集群的高可用性</li>
<li>对于哨兵 + redis主从这种复杂的部署架构，尽量在测试环境和生产环境，都进行充足的测试和演练</li>
</ol>
<h4 id="为什么redis哨兵集群只有2个节点无法正常工作？"><a href="#为什么redis哨兵集群只有2个节点无法正常工作？" class="headerlink" title="为什么redis哨兵集群只有2个节点无法正常工作？"></a>为什么redis哨兵集群只有2个节点无法正常工作？</h4><p>哨兵集群必须部署2个以上节点，如果哨兵集群仅仅部署了个2个哨兵实例</p>
<p>+—-+         +—-+<br>| M1 |———| R1 |<br>| S1 |         | S2 |<br>+—-+         +—-+</p>
<p>Configuration: quorum = 1（如果有quorum个哨兵投票选举，就认为master宕机，进行切换）</p>
<p>上图中2个哨兵，master宕机，s1和s2中只要有1个哨兵认为master宕机就可以进行切换，同时s1和s2中会选举出一个哨兵来执行故障转移。这个时候，它需要大多数(majority)哨兵都是运行的，2个哨兵的majority就是2（2的majority=2，3的majority=2，5的majority=3，4的majority=2），如果2个哨兵都运行着，就可以允许执行故障转移。但是，如果整个M1和S1运行的机器宕机了，那么哨兵只有1个了，此时就没有majority(大多数的哨兵)来允许执行故障转移，虽然另外一台机器还有一个R1，但是故障转移不会执行。</p>
<h4 id="经典的3节点哨兵集群"><a href="#经典的3节点哨兵集群" class="headerlink" title="经典的3节点哨兵集群"></a>经典的3节点哨兵集群</h4><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">       +----+</span><br><span class="line">       | M1 |</span><br><span class="line">       | S1 |</span><br><span class="line">       +----+</span><br><span class="line">          |</span><br><span class="line">+----+    |    +----+</span><br><span class="line">| R2 |----+----| R3 |</span><br><span class="line">| S2 |         | S3 |</span><br><span class="line">+----+         +----+</span><br></pre></td></tr></table></figure>
<p>Configuration: quorum = 2，majority</p>
<p>如果M1所在机器宕机了，那么三个哨兵还剩下2个，S2和S3可以一致认为master宕机，然后选举出一个来执行故障转移。</p>
<h3 id="数据丢失问题"><a href="#数据丢失问题" class="headerlink" title="数据丢失问题"></a>数据丢失问题</h3><p>主备切换的过程，可能会导致数据丢失</p>
<ol>
<li><p>异步复制导致的数据丢失</p>
<p> 因为master -&gt; slave的复制是异步的，所以可能有部分数据还没复制到slave，master就宕机了，此时这些部分数据就丢失了。</p>
</li>
<li><p>脑裂导致的数据丢失</p>
<p> <img src="https://images2015.cnblogs.com/blog/27612/201707/27612-20170701230311383-1540605556.png" alt=""><br> 脑裂，也就是说，某个master所在机器突然脱离了正常的网络，跟其他slave机器不能连接，但是实际上master还运行着，此时哨兵可能就会认为master宕机了，然后开启选举，将其他slave切换成了master，这个时候，集群里就会有两个master，也就是所谓的脑裂。此时虽然某个slave被切换成了master，但是可能client还没来得及切换到新的master，还继续写向旧master的数据可能也丢失了，因此旧master再次恢复的时候，会被作为一个slave挂到新的master上去，自己的数据会清空，重新从新的master复制数据。</p>
</li>
</ol>
<h4 id="解决异步复制和脑裂导致的数据丢失"><a href="#解决异步复制和脑裂导致的数据丢失" class="headerlink" title="解决异步复制和脑裂导致的数据丢失"></a>解决异步复制和脑裂导致的数据丢失</h4><p>有2个参数：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">min-slaves-to-write 1</span><br><span class="line">min-slaves-max-lag 10</span><br></pre></td></tr></table></figure>
<p>要求至少有1个slave，数据复制和同步的延迟不能超过10秒，如果说一旦所有的slave，数据复制和同步的延迟都超过了10秒钟，那么这个时候，master就不会再接收任何请求了，上面两个配置可以减少异步复制和脑裂导致的数据丢失。</p>
<ol>
<li><p>减少异步复制的数据丢失</p>
<p> 有了min-slaves-max-lag这个配置，就可以确保一旦slave复制数据和ack延时太长，就认为可能master宕机后损失的数据太多了，那么就拒绝写请求，这样可以把master宕机时由于部分数据未同步到slave导致的数据丢失降低的可控范围内。</p>
</li>
</ol>
<ol start="2">
<li><p>减少脑裂的数据丢失</p>
<p> 如果一个master出现了脑裂，跟其他slave丢了连接，上面的配置就确保了如果跟任何一个slave丢了连接，在10秒后发现没有slave给自己ack，那么就拒绝客户端新的写请求，因此在脑裂场景下，最多就丢失10秒的数据。</p>
</li>
</ol>
<h2 id="哨兵原理详解"><a href="#哨兵原理详解" class="headerlink" title="哨兵原理详解"></a>哨兵原理详解</h2><h3 id="sdown和odown转换机制"><a href="#sdown和odown转换机制" class="headerlink" title="sdown和odown转换机制"></a>sdown和odown转换机制</h3><p>sdown和odown是两种失败状态。sdown是主观宕机，就是一个哨兵如果自己觉得一个master宕机了，那么就是主观宕机。</p>
<p>odown是客观宕机，如果<code>quorum</code>数量的哨兵都觉得一个master宕机了，那么就是客观宕机。</p>
<p>sdown达成的条件很简单，如果一个哨兵ping一个master，超过了<code>is-master-down-after-milliseconds</code>参数指定的毫秒数之后，就主观认为master宕机。</p>
<p>sdown到odown转换的条件很简单，如果一个哨兵在指定时间内，收到了quorum指定数量的其他哨兵也认为那个master是sdown了，那么就认为是odown了，客观认为master宕机。</p>
<h3 id="哨兵集群的自动发现机制"><a href="#哨兵集群的自动发现机制" class="headerlink" title="哨兵集群的自动发现机制"></a>哨兵集群的自动发现机制</h3><p>哨兵互相之间的发现，是通过redis的<code>pub/sub</code>系统实现的，每个哨兵都会往<code>__sentinel__:hello</code>这个channel里发送一个消息，这时候所有其他哨兵都可以消费到这个消息，并感知到其他的哨兵的存在，每隔两秒钟，每个哨兵都会往自己监控的某个master+slaves对应的<code>__sentinel__:hello</code> channel里发送一个消息，内容是自己的host、ip和runid还有对这个master的监控配置。</p>
<p>每个哨兵也会去监听自己监控的每个master+slaves对应的<code>__sentinel__:hello</code>channel，然后去感知到同样在监听这个master+slaves的其他哨兵的存在。每个哨兵还会跟其他哨兵交换对master的监控配置，互相进行监控配置的同步。</p>
<h3 id="slave配置的自动纠正"><a href="#slave配置的自动纠正" class="headerlink" title="slave配置的自动纠正"></a>slave配置的自动纠正</h3><p>哨兵会负责自动纠正slave的一些配置，比如slave如果要成为潜在的master候选人，哨兵会确保slave在复制现有master的数据; 如果slave连接到了一个错误的master上，比如故障转移之后，哨兵会确保它们连接到正确的master上。</p>
<h3 id="slave-gt-master选举算法"><a href="#slave-gt-master选举算法" class="headerlink" title="slave-&gt;master选举算法"></a>slave-&gt;master选举算法</h3><p>如果一个master被认为odown了，而且majority（大多数）哨兵都允许了主备切换，那么某个哨兵就会执行主备切换操作，此时首先要选举一个slave。</p>
<p>选举会考虑slave的一些因素：</p>
<ol>
<li>跟master断开连接的时长</li>
<li>slave优先级</li>
<li>复制offset</li>
<li>run id</li>
</ol>
<p>如果一个slave跟master断开连接已经超过了<code>down-after-milliseconds</code>的10倍，外加master宕机的时长，那么slave就被认为不适合选举为master。</p>
<p>接下来会对slave进行排序：</p>
<ol>
<li>按照slave优先级进行排序，<code>slave priority</code>越低，优先级就越高。</li>
<li>如果<code>slave priority</code>相同，那么看<code>replica offset</code>，哪个slave复制了越多的数据，offset越靠后，优先级就越高。</li>
<li>如果上面两个条件都相同，那么选择一个run id比较小的那个slave。</li>
</ol>
<h3 id="quorum和majority"><a href="#quorum和majority" class="headerlink" title="quorum和majority"></a>quorum和majority</h3><p>每次一个哨兵要做主备切换，首先需要<code>quorum</code>数量的哨兵认为odown，然后选举出一个哨兵来做切换，这个哨兵还得得到majority哨兵的授权，才能正式执行切换。</p>
<p>如果quorum &lt; majority，比如5个哨兵，majority就是3，quorum设置为2，那么就3个哨兵授权就可以执行切换。</p>
<p>但是如果quorum &gt;= majority，那么必须quorum数量的哨兵都授权，比如5个哨兵，quorum是5，那么必须5个哨兵都同意授权，才能执行切换。</p>
<h3 id="configuration-epoch"><a href="#configuration-epoch" class="headerlink" title="configuration epoch"></a>configuration epoch</h3><p>哨兵会对一套redis master+slave进行监控，有相应的监控的配置，要执行切换的那个哨兵，会从要切换到的新master（salve-&gt;master）节点那里得到一个<code>configuration epoch</code>，这就是一个version号，每次切换的version号都必须是唯一的。</p>
<p>如果第一个选举出的哨兵切换失败了，那么其他哨兵，会等待failover-timeout时间，然后接替继续执行切换，此时会重新获取一个新的<code>configuration epoch</code>，作为新的version号。</p>
<h3 id="configuraiton传播"><a href="#configuraiton传播" class="headerlink" title="configuraiton传播"></a>configuraiton传播</h3><p>哨兵完成切换之后，会在自己本地更新生成最新的master配置，然后同步给其他的哨兵，就是通过之前说的pub/sub消息机制，这里之前的version号就很重要了，因为各种消息都是通过一个channel去发布和监听的，所以一个哨兵完成一次新的切换之后，新的master配置是跟着新的version号的，其他的哨兵都是根据版本号的大小来更新自己的master配置。</p>
<h2 id="哨兵集群实战"><a href="#哨兵集群实战" class="headerlink" title="哨兵集群实战"></a>哨兵集群实战</h2><p>动手实操，练习如何操作部署哨兵集群，如何基于哨兵进行故障转移，还有一些企业级的配置方案。</p>
<h3 id="哨兵的配置文件"><a href="#哨兵的配置文件" class="headerlink" title="哨兵的配置文件"></a>哨兵的配置文件</h3><p>每一个哨兵都可以去监控多个maser-slaves的主从架构，相同的一套哨兵集群，可以去监控不同的多个redis主从集群，只需要给每个redis主从集群分配一个逻辑的名称。</p>
<p><code>sentinel.conf</code></p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 指定对一个master的监控，给监控的master指定的一个名称，后面分布式集群架构里会讲到，可以配置多个master做数据拆分。</span></span><br><span class="line">sentinel monitor mymaster 127.0.0.1 6379 2</span><br><span class="line"><span class="comment"># 超过多少毫秒跟一个redis实例断了连接，哨兵就可能认为这个redis实例挂了</span></span><br><span class="line">sentinel down-after-milliseconds mymaster 60000</span><br><span class="line"><span class="comment"># 执行故障转移的timeout超时时长</span></span><br><span class="line">sentinel failover-timeout mymaster 180000</span><br><span class="line"><span class="comment"># 新的master切换之后，同时有多少个slave被切换到去连接新master，重新做同步，数字越低，花费的时间越多</span></span><br><span class="line">sentinel parallel-syncs mymaster 1</span><br><span class="line"><span class="comment"># 上面的三个配置，都是针对某个监控的master配置的，给其指定上面分配的名称即可</span></span><br><span class="line"></span><br><span class="line">sentinel monitor resque 192.168.1.3 6380 4</span><br><span class="line">sentinel down-after-milliseconds resque 10000</span><br><span class="line">sentinel failover-timeout resque 180000</span><br><span class="line">sentinel parallel-syncs resque 5</span><br><span class="line"></span><br><span class="line">sentinel monitor mymaster 127.0.0.1 6379</span><br></pre></td></tr></table></figure>
<p>上面这段配置，就监控了两个master node。这是最小的哨兵配置，如果发生了master-slave故障转移，或者新的哨兵进程加入哨兵集群，那么哨兵会自动更新自己的配置文件。</p>
<pre><code>sentinel monitor master-group-name hostname port quorum
</code></pre><p><strong>quorum</strong>的解释如下：</p>
<ol>
<li>至少多少个哨兵要一致同意，master进程挂掉了，或者slave进程挂掉了，或者要启动一个故障转移操作</li>
<li>quorum是用来识别故障的，真正执行故障转移的时候，还是要在哨兵集群执行选举，选举一个哨兵进程出来执行故障转移操作</li>
<li>假设有5个哨兵，quorum设置了2，那么如果5个哨兵中的2个都认为master挂掉了; 2个哨兵中的一个就会做一个选举，选举一个哨兵出来，执行故障转移; 如果5个哨兵中有3个哨兵都是运行的，那么故障转移就会被允许执行</li>
</ol>
<p>假设你的redis是1个master，4个slave，然后master宕机了，4个slave中有1个切换成了master，剩下3个slave就要挂到新的master上面去，这个时候，如果<code>parallel-syncs</code>是1，那么3个slave，一个一个地挂接到新的master上面去，1个挂接完，而且从新的master sync完数据之后，再挂接下一个。如果<code>parallel-syncs</code>是3，那么一次性就会把所有slave挂接到新的master上去。</p>
<h3 id="在eshop-cache03上再部署一个Redis"><a href="#在eshop-cache03上再部署一个Redis" class="headerlink" title="在eshop-cache03上再部署一个Redis"></a>在eshop-cache03上再部署一个Redis</h3><blockquote>
<p>eshop-cache03是我本机安装的又一台虚拟机。</p>
</blockquote>
<p>先安装好Redis，但是不用启动，接下来做哨兵的配置。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">wget http://downloads.sourceforge.net/tcl/tcl8.6.1-src.tar.gz</span><br><span class="line">tar -xzvf tcl8.6.1-src.tar.gz</span><br><span class="line">cd  /usr/local/tcl8.6.1/unix/</span><br><span class="line">./configure  </span><br><span class="line">make &amp;&amp; make install</span><br><span class="line"></span><br><span class="line">使用redis-3.2.8.tar.gz</span><br><span class="line">tar -zxvf redis-3.2.8.tar.gz</span><br><span class="line">cd redis-3.2.8</span><br><span class="line">make &amp;&amp; make test</span><br><span class="line">make install</span><br></pre></td></tr></table></figure>
<h3 id="配置哨兵"><a href="#配置哨兵" class="headerlink" title="配置哨兵"></a>配置哨兵</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">mkdir /etc/sentinel</span><br><span class="line">mkdir -p /var/sentinel/5000</span><br><span class="line">vi /etc/sentinel/5000.conf</span><br></pre></td></tr></table></figure>
<p><code>5000.conf</code></p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><span class="line">port 5000</span><br><span class="line"><span class="built_in">bind</span> 192.168.2.201</span><br><span class="line">dir /var/sentinel/5000</span><br><span class="line">sentinel monitor mymaster 192.168.2.201 6379 2</span><br><span class="line">sentinel down-after-milliseconds mymaster 30000</span><br><span class="line">sentinel failover-timeout mymaster 60000</span><br><span class="line">sentinel parallel-syncs mymaster 1</span><br><span class="line"></span><br><span class="line">port 5000</span><br><span class="line"><span class="built_in">bind</span> 192.168.2.202</span><br><span class="line">dir /var/sentinel/5000</span><br><span class="line">sentinel monitor mymaster 192.168.2.201 6379 2</span><br><span class="line">sentinel down-after-milliseconds mymaster 30000</span><br><span class="line">sentinel failover-timeout mymaster 60000</span><br><span class="line">sentinel parallel-syncs mymaster 1</span><br><span class="line"></span><br><span class="line">port 5000</span><br><span class="line"><span class="built_in">bind</span> 192.168.2.203</span><br><span class="line">dir /var/sentinel/5000</span><br><span class="line">sentinel monitor mymaster 192.168.2.201 6379 2</span><br><span class="line">sentinel down-after-milliseconds mymaster 30000</span><br><span class="line">sentinel failover-timeout mymaster 60000</span><br><span class="line">sentinel parallel-syncs mymaster 1</span><br></pre></td></tr></table></figure>
<p>注意这是3段哨兵的配置，分别在我的3台虚拟机上进行配置。哨兵默认用26379端口，默认不能跟其他机器在指定端口连通，只能在本地访问，所以要改一下<code>bind</code>配置，把三台redis实例的ip都加上。</p>
<h3 id="启动哨兵进程"><a href="#启动哨兵进程" class="headerlink" title="启动哨兵进程"></a>启动哨兵进程</h3><p>在eshop-cache01、eshop-cache02、eshop-cache03三台机器上，分别启动三个哨兵进程，组成一个集群，观察一下日志的输出。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">redis-sentinel /etc/sentinel/5000.conf</span><br><span class="line">redis-server /etc/sentinel/5000.conf --sentinel</span><br></pre></td></tr></table></figure>
<p>日志输出：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">1318:X 16 Feb 18:59:15.097 # +monitor master mymaster 192.168.2.201 6379 quorum 2</span><br><span class="line">1318:X 16 Feb 18:59:15.099 * +slave slave 192.168.2.202:6379 192.168.2.202 6379 @ mymaster 192.168.2.201 6379</span><br><span class="line">1318:X 16 Feb 18:59:15.177 * +sentinel sentinel 6f6009aac859757a296467f11f68af7284e4c9ff 192.168.2.202 5000 @ mymaster 192.168.2.201 6379</span><br><span class="line">1318:X 16 Feb 18:59:16.861 * +sentinel sentinel 4fbf75c6fcbfdd09fe8460b6e12006561567f24d 192.168.2.203 5000 @ mymaster 192.168.2.201 6379</span><br></pre></td></tr></table></figure>
<p>日志里会显示出来，每个哨兵都能去监控到对应的redis master，并能够自动发现对应的slave。</p>
<p>哨兵之间，互相会自动进行发现，用的就是之前说的pub/sub，消息发布和订阅channel消息系统和机制。</p>
<h3 id="检查哨兵状态"><a href="#检查哨兵状态" class="headerlink" title="检查哨兵状态"></a>检查哨兵状态</h3><p>redis-cli -h 192.168.2.201 -p 5000</p>
<p>sentinel master mymaster<br>SENTINEL slaves mymaster<br>SENTINEL sentinels mymaster</p>
<p>SENTINEL get-master-addr-by-name mymaster</p>
<h2 id="哨兵管理和容灾演练"><a href="#哨兵管理和容灾演练" class="headerlink" title="哨兵管理和容灾演练"></a>哨兵管理和容灾演练</h2><h3 id="哨兵节点的增加和删除"><a href="#哨兵节点的增加和删除" class="headerlink" title="哨兵节点的增加和删除"></a>哨兵节点的增加和删除</h3><p>如果是增加sentinel，会自动发现。</p>
<p>删除sentinel的步骤：</p>
<ol>
<li>停止sentinel进程</li>
<li>SENTINEL RESET *，在所有sentinel上执行，清理所有的master状态</li>
<li>SENTINEL MASTER mastername，在所有sentinel上执行，查看所有sentinel对数量是否达成了一致</li>
</ol>
<h3 id="slave的永久下线"><a href="#slave的永久下线" class="headerlink" title="slave的永久下线"></a>slave的永久下线</h3><p>让master摘除某个已经下线的slave：<code>SENTINEL RESET mastername</code>，在所有的哨兵上面执行.</p>
<h3 id="slave切换为Master的优先级"><a href="#slave切换为Master的优先级" class="headerlink" title="slave切换为Master的优先级"></a>slave切换为Master的优先级</h3><p>slave-&gt;master选举优先级：<code>slave-priority</code>，值越小优先级越高</p>
<h3 id="基于哨兵集群架构下的安全认证"><a href="#基于哨兵集群架构下的安全认证" class="headerlink" title="基于哨兵集群架构下的安全认证"></a>基于哨兵集群架构下的安全认证</h3><p>每个slave都有可能切换成master，所以每个实例都要配置两个指令</p>
<p>master上启用安全认证，<code>requirepass</code><br>master连接口令，<code>masterauth</code></p>
<p>sentinel配置：<code>sentinel auth-pass &lt;master-group-name&gt; &lt;pass&gt;</code></p>
<h3 id="容灾演练"><a href="#容灾演练" class="headerlink" title="容灾演练"></a>容灾演练</h3><p>通过哨兵看一下当前的master：<code>SENTINEL get-master-addr-by-name mymaster</code>。</p>
<p>把master节点kill -9掉，pid文件也删除掉。</p>
<p>日志：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">1336:X 16 Feb 22:05:18.458 # -sdown master mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:18.458 # -odown master mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:18.458 # +selected-slave slave 192.168.2.202:6379 192.168.2.202 6379 @ mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:18.458 * +failover-state-send-slaveof-noone slave 192.168.2.202:6379 192.168.2.202 6379 @ mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:18.559 * +failover-state-wait-promotion slave 192.168.2.202:6379 192.168.2.202 6379 @ mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:19.417 # +promoted-slave slave 192.168.2.202:6379 192.168.2.202 6379 @ mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:19.417 # +failover-state-reconf-slaves master mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:19.486 # +failover-end master mymaster 192.168.2.201 6379</span><br><span class="line">1336:X 16 Feb 22:05:19.486 # +switch-master mymaster 192.168.2.201 6379 192.168.2.202 6379</span><br><span class="line">1336:X 16 Feb 22:05:19.486 * +slave slave 192.168.2.201:6379 192.168.2.201 6379 @ mymaster 192.168.2.202 6379</span><br><span class="line">1336:X 16 Feb 22:05:29.593 * +convert-to-slave slave 192.168.2.201:6379 192.168.2.201 6379 @ mymaster 192.168.2.202 6379</span><br></pre></td></tr></table></figure>
<p>查看sentinel的日志，是否出现+sdown字样，识别出了master的宕机问题; 然后出现+odown字样，就是指定的quorum哨兵数量，都认为master宕机了。</p>
<ol>
<li>三个哨兵进程都认为master是sdown了</li>
<li>超过quorum指定的哨兵进程都认为sdown之后，就变为odown</li>
<li>哨兵1是被选举为要执行后续的主备切换的那个哨兵</li>
<li>哨兵1去新的master（slave）获取了一个新的config version</li>
<li>尝试执行failover</li>
<li>投票选举出一个slave去切换成master，每个哨兵都会执行一次投票</li>
<li><code>failover-state-send-slaveof-noone</code>，不让它去做任何节点的slave了; 把slave提拔成master; 旧的master认为不再是master了</li>
<li>哨兵就自动认为之前的201:6379变成了slave了，202:6379变成了master了</li>
<li>哨兵去探查了一下201:6379这个salve的状态，认为它sdown了</li>
</ol>
<p>所有哨兵选举出了一个实例，来执行主备切换操作，可以看到投票的日志<code>xxx voted for xxxx</code>。如果majority的哨兵都存活着，那么就会执行主备切换操作，刚才日志里也看到了，<code>+switch-master mymaster 192.168.2.201 6379 192.168.2.202 6379</code>。</p>
<p>再通过哨兵看一下master：SENTINEL get-master-addr-by-name mymaster</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">[root@eshop-cache01 ~]<span class="comment"># redis-cli -h 192.168.2.201 -p 5000</span></span><br><span class="line">192.168.2.201:5000&gt; SENTINEL get-master-addr-by-name mymaster</span><br><span class="line">1) <span class="string">"192.168.2.202"</span></span><br><span class="line">2) <span class="string">"6379"</span></span><br></pre></td></tr></table></figure>
<p>可以看到master已经变成<code>192.168.2.202:6379</code>了，接下来我们试试故障恢复，再将旧的master重新启动，查看是否被哨兵自动切换成slave节点。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">[root@eshop-cache01 ~]<span class="comment"># /etc/init.d/redis_6379 start</span></span><br></pre></td></tr></table></figure>
<p>重新看一下202上的<code>info replication</code>：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">192.168.2.202:6379&gt; info replication</span><br><span class="line"><span class="comment"># Replication</span></span><br><span class="line">role:master</span><br><span class="line">connected_slaves:1</span><br><span class="line">slave0:ip=192.168.2.201,port=6379,state=online,offset=219972,lag=1</span><br></pre></td></tr></table></figure>
<p>发现201它变成了一个slave了。</p>
<p>所以容灾的演练的步骤是：</p>
<ol>
<li>手动杀掉master</li>
<li>哨兵能否执行主备切换，将slave切换为master</li>
<li>哨兵完成主备切换后，新的master能否使用</li>
<li>故障恢复，将旧的master重新启动</li>
<li>哨兵能否自动将旧的master变为slave，挂接到新的master上面去，而且也是可以使用的</li>
</ol>
<h3 id="哨兵的生产环境部署"><a href="#哨兵的生产环境部署" class="headerlink" title="哨兵的生产环境部署"></a>哨兵的生产环境部署</h3><p>配置文件改成后台运行，然后把日志路径配置上。</p>
<pre><code>daemonize yes
logfile /var/log/sentinel/5000.log
mkdir -p /var/log/sentinel
</code></pre><h2 id="如何让Redis支持1T以上大数据"><a href="#如何让Redis支持1T以上大数据" class="headerlink" title="如何让Redis支持1T以上大数据"></a>如何让Redis支持1T以上大数据</h2><h3 id="单Master的redis在海量数据面前的瓶颈"><a href="#单Master的redis在海量数据面前的瓶颈" class="headerlink" title="单Master的redis在海量数据面前的瓶颈"></a>单Master的redis在海量数据面前的瓶颈</h3><p>Master节点的数据和slave节点的数据是一样的，master最大能容纳多大的数据量，那么slave也就只能容纳多大的数据量。</p>
<p>Redis的缓存清理算法，将旧的很少使用的数据，给清除出内存，然后保证内存中，就只有固定大小的内存，不可能超过master内存的物理上线。</p>
<p>但是如果要让Redis保存1T以上的数据在缓存里，供系统高性能的查询和运行，在单机Master的情况下，目前几乎是不可能达到的。</p>
<h3 id="怎么才能够突破单机瓶颈，让redis支撑海量数据？"><a href="#怎么才能够突破单机瓶颈，让redis支撑海量数据？" class="headerlink" title="怎么才能够突破单机瓶颈，让redis支撑海量数据？"></a>怎么才能够突破单机瓶颈，让redis支撑海量数据？</h3><p>如果要支撑更大数据量的缓存，那就横向扩容更多的master节点，每个master节点就能存放更多的数据了，单台服务器是32GB，30台左右就可以支撑1T的数据量了。</p>
<h3 id="Redis集群架构"><a href="#Redis集群架构" class="headerlink" title="Redis集群架构"></a>Redis集群架构</h3><p>Redis集群架构支持N个master node，每个master node都可以挂载多个slave node，依然是读写分离的架构，对于每个master来说，写就写到master，然后读就从master对应的slave去读。</p>
<p>集群高可用：因为每个master都有slave节点，那么如果master挂掉，redis cluster的的机制，就会自动将某个slave切换成master。</p>
<p>redis cluster = 多master + 读写分离 + 高可用。</p>
<p>所以只需要基于redis cluster去搭建redis集群即可，<strong>不需要</strong>手工去搭建replication复制+主从架构+读写分离+哨兵集群+高可用。</p>
<h3 id="redis-cluster-vs-replication-sentinel"><a href="#redis-cluster-vs-replication-sentinel" class="headerlink" title="redis cluster vs. replication + sentinel"></a>redis cluster vs. replication + sentinel</h3><p>如果数据量很少，主要是为了承载高并发高性能的场景，比如你的缓存一般就几个G，单机足够了。</p>
<p>如果是<strong>replication</strong>架构，一个mater，多个slave，需要几个slave跟要求的读吞吐量有关系，然后搭建一个sentinel集群，去保证redis主从架构的高可用性，就能满足需求了。<strong>redis cluster</strong>主要是针对<strong>海量数据+高并发+高可用</strong>的场景，如果数据量很大，那么建议就用<strong>redis cluster</strong>。</p>
<h3 id="分布式数据存储的核心算法"><a href="#分布式数据存储的核心算法" class="headerlink" title="分布式数据存储的核心算法"></a>分布式数据存储的核心算法</h3><p>随着技术的进步，算法的进阶：</p>
<pre><code>hash算法 -&gt; 一致性hash算法（memcached） -&gt; redis cluster，hash slot算法
</code></pre><p>用不同的算法，就决定了在多个master节点的时候，数据如何分布到这些节点上去。</p>
<ol>
<li><p>hash算法</p>
<p> <img src="/img/cache/最老土的hash算法以及弊端.png" alt="最老土的hash算法以及弊端"></p>
<p> 先是通过对key计算hash值，然后对节点数量（3）取模，取模结果一定是0~2之间，小于节点数量，然后根据索引去对应节点删取数据。如果某一个master宕机了，所有请求过来都会重新基于新的节点数量（2）去取模，此时所有数据都无法获取到，大量的流量会涌入到数据库中，几乎100%的缓存都可能失效了。</p>
</li>
<li><p>一致性hash算法</p>
<p> <img src="/img/cache/一致性hash算法的讲解和优点.png" alt="一致性hash算法的讲解和优点"></p>
<p> 同样是先是通过对key计算hash值，然后用hash值落在圆环上的某个点，然后顺时针去寻找最近的一个节点。这个算法保证了如果某一台master宕机，只有之前那台master上的数据会受到影响，因为顺时针会找到下一个节点，还是找不到数据，此时只有1/3的数据找不到，流量会涌入到数据库中，重新查询一次。但是一致性hash算法也有一个问题，那就是换缓存热点数据问题，可能集中在某个hash区间的值特别多，会导致大量数据都涌入同一个master内，造成master的热点问题，性能出现瓶颈。</p>
</li>
<li><p>优化一致性hash算法</p>
<p> <img src="/img/cache/一致性hash算法的虚拟节点实现负载均衡.png" alt="一致性hash算法的虚拟节点实现负载均衡"></p>
<p> 为了解决缓存热点数据问题，增加了虚拟节点的概念，如上图的黑色圆圈。给每个master都做了均匀分布的虚拟节点。这样的话，在每个区间内，大量的数据都会均分到不同的节点上，而不是按照顺时针的顺序去涌入同一个master内。</p>
</li>
<li><p>hash slot算法</p>
<p> <img src="/img/cache/redis cluster hash slot算法.png" alt="redis cluster hash slot算法"></p>
<p> Redis 集群有16384个哈希槽,每个key通过CRC16校验后对16384取模来决定放置哪个槽.集群的每个节点负责一部分hash槽,举个例子,比如当前集群有3个节点,那么:</p>
<pre><code>* 节点 A 包含 0 到 5500号哈希槽
* 节点 B 包含5501 到 11000 号哈希槽
* 节点 C 包含11001 到 16384号哈希槽
</code></pre><p> 这种结构很容易添加或者删除节点. 比如如果我想新添加个节点D, 我需要从节点 A, B, C中得部分槽到D上. 如果我像移除节点A,需要将A中得槽移到B和C节点上,然后将没有任何槽的A节点从集群中移除即可. 由于从一个节点将哈希槽移动到另一个节点并不会停止服务,所以无论添加删除或者改变某个节点的哈希槽的数量都不会造成集群不可用的状态.</p>
</li>
</ol>
<h2 id="Redis-Cluster介绍"><a href="#Redis-Cluster介绍" class="headerlink" title="Redis Cluster介绍"></a>Redis Cluster介绍</h2><p>Redis 集群是一个提供在多个Redis间节点间共享数据的程序集。</p>
<p> Redis 集群的优势:</p>
<ol>
<li>自动将数据进行分片，每个master上放一部分数据。</li>
<li>提供内置的高可用支持，部分master不可用时，还是可以继续工作。</li>
</ol>
<h3 id="Redis-Cluster实战部署"><a href="#Redis-Cluster实战部署" class="headerlink" title="Redis Cluster实战部署"></a>Redis Cluster实战部署</h3><p>Redis Cluster会自动去做master+slave架构的复制和读写分离，以及master+slave的高可用+主备切换，支持多个master的hash slot分布式数据存储，所以我们之前的redis主从，哨兵集群，全部都不需要了。</p>
<h4 id="Redis-Cluster的重要配置"><a href="#Redis-Cluster的重要配置" class="headerlink" title="Redis Cluster的重要配置"></a>Redis Cluster的重要配置</h4><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">cluster-enabled &lt;yes/no&gt;</span><br><span class="line"></span><br><span class="line">cluster-config-file &lt;filename&gt;：这是指定一个文件，供cluster模式下的redis实例保存集群状态，包括集群中其他机器的信息，比如节点的上线和下线，故障转移，不是我们去维护的，给它指定一个文件，让redis自己去维护。</span><br><span class="line"></span><br><span class="line">cluster-node-timeout &lt;milliseconds&gt;：节点存活超时时长，超过一定时长，认为节点宕机，master宕机的话就会触发主备切换，slave宕机就不会提供服务。</span><br></pre></td></tr></table></figure>
<h4 id="在三台机器上启动6个redis实例"><a href="#在三台机器上启动6个redis实例" class="headerlink" title="在三台机器上启动6个redis实例"></a>在三台机器上启动6个redis实例</h4><p>redis cluster集群，要求至少3个master去组成一个高可用，健壮的分布式的集群，每个master都建议至少给一个slave，所以3个master，3个slave，这是最少的要求。如果是正式环境下，建议在6台机器上去搭建，是为了保证每个master都跟自己的slave不在同一台机器上，自然是6台自然更好，否则机器挂了，一个master+一个slave就死了，集群也就不可用了。</p>
<p>我的虚拟机为了方便测试，使用3台机器去搭建6个redis实例的redis cluster。</p>
<p>先创建目录：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">mkdir -p /etc/redis-cluster</span><br><span class="line">mkdir -p /var/<span class="built_in">log</span>/redis</span><br></pre></td></tr></table></figure>
<p>201：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">mkdir -p /var/redis/7001</span><br><span class="line">mkdir -p /var/redis/7002</span><br></pre></td></tr></table></figure>
<p>202：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">mkdir -p /var/redis/7003</span><br><span class="line">mkdir -p /var/redis/7004</span><br></pre></td></tr></table></figure>
<p>203：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">mkdir -p /var/redis/7005</span><br><span class="line">mkdir -p /var/redis/7006</span><br></pre></td></tr></table></figure>
<p>写六份配置文件分别对应7001~7006，/etc/redis/7001.conf，每台机器上2个实例：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line">port 7001</span><br><span class="line">cluster-enabled yes</span><br><span class="line">cluster-config-file /etc/redis-cluster/node-7001.conf</span><br><span class="line">cluster-node-timeout 15000</span><br><span class="line">daemonize	yes							</span><br><span class="line">pidfile		/var/run/redis_7001.pid 						</span><br><span class="line">dir 		/var/redis/7001		</span><br><span class="line">logfile /var/<span class="built_in">log</span>/redis/7001.log</span><br><span class="line"><span class="built_in">bind</span> 192.168.2.201</span><br><span class="line">appendonly yes</span><br></pre></td></tr></table></figure>
<p>将上面的配置文件，在/etc/redis下放6个，分别为: 7001.conf，7002.conf，7003.conf，7004.conf，7005.conf，7006.conf，至少要用3个master节点启动，每个master加一个slave节点，先选择6个节点，启动6个实例。</p>
<h4 id="准备生产环境的启动脚本"><a href="#准备生产环境的启动脚本" class="headerlink" title="准备生产环境的启动脚本"></a>准备生产环境的启动脚本</h4><p>在/etc/init.d下，放6个启动脚本，分别为: <code>redis_7001, redis_7002, redis_7003, redis_7004, redis_7005, redis_7006</code>。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="built_in">cd</span> /etc/init.d/</span><br><span class="line">cp redis_6379 redis_7001</span><br><span class="line">vi redis_7001</span><br></pre></td></tr></table></figure>
<p>将<code>REDISPORT</code>修改为7001~7006对应的端口号。</p>
<p>检查一下3台机器上的配置文件，目录是否都已经准备好，然后分别在3台机器上，启动6个redis实例。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">[root@eshop-cache01 redis]<span class="comment"># /etc/init.d/redis_7001 start</span></span><br><span class="line">[root@eshop-cache01 redis]<span class="comment"># /etc/init.d/redis_7002 start</span></span><br><span class="line">[root@eshop-cache02 init.d]<span class="comment"># /etc/init.d/redis_7003 start</span></span><br><span class="line">[root@eshop-cache02 init.d]<span class="comment"># /etc/init.d/redis_7004 start</span></span><br><span class="line">[root@eshop-cache03 init.d]<span class="comment"># /etc/init.d/redis_7005 start</span></span><br><span class="line">[root@eshop-cache03 init.d]<span class="comment"># /etc/init.d/redis_7006 start</span></span><br></pre></td></tr></table></figure>
<h4 id="创建Redis集群"><a href="#创建Redis集群" class="headerlink" title="创建Redis集群"></a>创建Redis集群</h4><p>创建集群的工具是用的<code>redis-trib</code>，它是用ruby写的，所以我们得先安装ruby环境。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">yum install –y gcc* openssl* wget</span><br><span class="line">wget https://cache.ruby-lang.org/pub/ruby/2.3/ruby-2.3.1.tar.gz</span><br><span class="line">tar -zxvf ruby-2.3.1.tar.gz</span><br><span class="line">cd ruby-2.3.1</span><br><span class="line">./configure -prefix=/usr/local/ruby</span><br><span class="line">make &amp;&amp; make install</span><br><span class="line">ln -sf /usr/local/ruby/bin/* /usr/bin/</span><br><span class="line"></span><br><span class="line">wget http://rubygems.org/downloads/redis-3.3.0.gem</span><br><span class="line">gem install -l ./redis-3.3.0.gem</span><br><span class="line">gem list --check redis gem</span><br></pre></td></tr></table></figure>
<p>我在安装的时候遇到一个错误</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">gem install -l ./redis-3.3.0.gem</span><br><span class="line">ERROR:  Loading <span class="built_in">command</span>: install (LoadError)</span><br><span class="line">	cannot load such file -- zlib</span><br><span class="line">ERROR:  While executing gem ... (NoMethodError)</span><br><span class="line">    undefined method `invoke_with_build_args<span class="string">' for nil:NilClass</span></span><br></pre></td></tr></table></figure>
<p>解决方法：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">yum install zlib-devel</span><br><span class="line"><span class="built_in">cd</span> ruby-2.3.1/ext/zlib  </span><br><span class="line">ruby ./extconf.rb  </span><br><span class="line">make &amp;&amp; make install</span><br></pre></td></tr></table></figure>
<p>然后安装：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">gem install -l ./redis-3.3.0.gem</span><br><span class="line">cp /usr/local/redis-4.0.8/src/redis-trib.rb /usr/local/bin</span><br><span class="line">redis-trib.rb create --replicas 1 192.168.2.201:7001 192.168.2.201:7002 192.168.2.202:7003 192.168.2.202:7004 192.168.2.203:7005 192.168.2.203:7006</span><br></pre></td></tr></table></figure>
<p>–replicas: 每个master有几个slave</p>
<p>6台机器，3个master，3个slave，尽量自己让master和slave不在一台机器上。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br></pre></td><td class="code"><pre><span class="line">[root@eshop-cache01 <span class="built_in">local</span>]<span class="comment"># redis-trib.rb create --replicas 1 192.168.2.201:7001 192.168.2.201:7002 192.168.2.202:7003 192.168.2.202:7004 192.168.2.203:7005 192.168.2.203:7006</span></span><br><span class="line">&gt;&gt;&gt; Creating cluster</span><br><span class="line">&gt;&gt;&gt; Performing <span class="built_in">hash</span> slots allocation on 6 nodes...</span><br><span class="line">Using 3 masters:</span><br><span class="line">192.168.2.201:7001</span><br><span class="line">192.168.2.202:7003</span><br><span class="line">192.168.2.203:7005</span><br><span class="line">Adding replica 192.168.2.202:7004 to 192.168.2.201:7001</span><br><span class="line">Adding replica 192.168.2.203:7006 to 192.168.2.202:7003</span><br><span class="line">Adding replica 192.168.2.201:7002 to 192.168.2.203:7005</span><br><span class="line">M: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">   slots:0-5460 (5461 slots) master</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:5461-10922 (5462 slots) master</span><br><span class="line">S: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   replicates 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:10923-16383 (5461 slots) master</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">Can I <span class="built_in">set</span> the above configuration? (<span class="built_in">type</span> <span class="string">'yes'</span> to accept): yes</span><br><span class="line">&gt;&gt;&gt; Nodes configuration updated</span><br><span class="line">&gt;&gt;&gt; Assign a different config epoch to each node</span><br><span class="line">&gt;&gt;&gt; Sending CLUSTER MEET messages to join the cluster</span><br><span class="line">Waiting <span class="keyword">for</span> the cluster to join......</span><br><span class="line">&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7001)</span><br><span class="line">M: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">   slots:0-5460 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:10923-16383 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:5461-10922 (5462 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">S: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">[OK] All nodes agree about slots configuration.</span><br><span class="line">&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">[OK] All 16384 slots covered.</span><br></pre></td></tr></table></figure>
<p>可以检查一下集群的状态：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb check 192.168.2.201:7001</span><br></pre></td></tr></table></figure>
<h4 id="Redis-Cluster测试"><a href="#Redis-Cluster测试" class="headerlink" title="Redis Cluster测试"></a>Redis Cluster测试</h4><p>接下来对刚才搭建的集群做一些测试，<strong>Redis Cluster</strong>提供了多个master，数据可以分布式存储在多个master上; 每个master都带着slave，自动就做读写分离; 某个master如果故障，那么就会自动将slave切换成master，从而达到高可用。</p>
<h5 id="实验多master写入-gt-海量数据的分布式存储"><a href="#实验多master写入-gt-海量数据的分布式存储" class="headerlink" title="实验多master写入 -&gt; 海量数据的分布式存储"></a>实验多master写入 -&gt; 海量数据的分布式存储</h5><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">[root@eshop-cache01 <span class="built_in">local</span>]<span class="comment"># redis-cli -h 192.168.2.201 -p 7001</span></span><br><span class="line">192.168.2.201:7001&gt; <span class="built_in">set</span> mykey1 v1</span><br><span class="line">OK</span><br><span class="line">192.168.2.201:7001&gt; <span class="built_in">set</span> mykey2 v2</span><br><span class="line">(error) MOVED 14119 192.168.2.203:7005</span><br><span class="line">192.168.2.201:7001&gt; <span class="built_in">set</span> mykey3 v3</span><br><span class="line">(error) MOVED 9990 192.168.2.202:7003</span><br></pre></td></tr></table></figure>
<p>我们在redis cluster写入数据的时候，其实是可以将请求发送到任意一个master上去执行的。但是，每个master都会计算这个key对应的CRC16值，然后对16384个<strong>hash slot</strong>取模，找到key对应的<strong>hash slot</strong>，找到hash slot对应的master。如果对应的master就在自己本地的话，set mykey1 v1，mykey1这个key对应的hashslot就在自己本地，那么自己就处理掉了。但是如果计算出来的hashslot在其他master上，那么就会给客户端返回一个moved error，告诉你，你得到哪个master上去执行这条写入的命令。<strong>什么叫做多master的写入，就是每条数据只能存在于一个master上，不同的master负责存储不同的数据，分布式的数据存储。100w条数据，5个master，每个master就负责存储20w条数据，分布式数据存储。</strong></p>
<p>所以我们需要去7005和7003实例上执行后面2条语句。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">redis-cli -h 192.168.2.202 -p 7003</span><br><span class="line">192.168.2.202:7003&gt; <span class="built_in">set</span> mykey3 v3</span><br><span class="line">OK</span><br><span class="line">redis-cli -h 192.168.2.203 -p 7005</span><br><span class="line">192.168.2.203:7005&gt; <span class="built_in">set</span> mykey2 v2</span><br><span class="line">OK</span><br></pre></td></tr></table></figure>
<h5 id="实验不同master各自的slave读取-gt-读写分离"><a href="#实验不同master各自的slave读取-gt-读写分离" class="headerlink" title="实验不同master各自的slave读取 -&gt; 读写分离"></a>实验不同master各自的slave读取 -&gt; 读写分离</h5><p>刚才是写入数据，现在我们去各自的从节点试试取数据，根据之前是日志分析，我们知道每台master的从节点信息如下：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">Adding replica 192.168.2.202:7004 to 192.168.2.201:7001</span><br><span class="line">Adding replica 192.168.2.203:7006 to 192.168.2.202:7003</span><br><span class="line">Adding replica 192.168.2.201:7002 to 192.168.2.203:7005</span><br></pre></td></tr></table></figure>
<p>试试看，发现读不到，原来在redis cluster中，如果你要在slave读取数据，那么需要带先执行<code>readonly</code>指令，再<code>get mykey1</code>。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">redis-cli -h 192.168.2.202 -p 7004</span><br><span class="line">192.168.2.202:7004&gt; get mykey1</span><br><span class="line">(error) MOVED 1860 192.168.2.201:7001</span><br><span class="line">192.168.2.202:7004&gt; <span class="built_in">readonly</span></span><br><span class="line">OK</span><br><span class="line">192.168.2.202:7004&gt; get mykey1</span><br><span class="line"><span class="string">"v1"</span></span><br></pre></td></tr></table></figure>
<p>实际上Redis的客户端是可以帮我们自动路由的，只需要在连接的时候加上<code>-c</code>的参数。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">redis-cli -h 192.168.2.201 -p 7001 -c</span><br><span class="line">192.168.2.201:7001&gt; <span class="built_in">set</span> mykey2 v2</span><br><span class="line">-&gt; Redirected to slot [14119] located at 192.168.2.203:7005</span><br><span class="line">OK</span><br><span class="line">192.168.2.203:7005&gt; get mykey1</span><br><span class="line">-&gt; Redirected to slot [1860] located at 192.168.2.201:7001</span><br><span class="line"><span class="string">"v1"</span></span><br></pre></td></tr></table></figure>
<p>现在我们发现实验redis cluster的读写分离的时候，会发现有一定的限制性，因为默认情况下，redis cluster的核心的理念，主要是用slave做高可用的，每个master挂一两个slave，主要是做数据的热备，还有master故障时的主备切换，<strong>它的侧重点在高可用，而不是读写分离。</strong></p>
<p>redis cluster默认是不支持slave节点读或者写的，跟我们手动基于<code>replication</code>搭建的主从架构不一样。想要在从节点上读取数据，必须要先执行<code>readonly</code>指令。</p>
<p>虽然Redis Cluster的主从架构出来了，但是要做读写分离，就复杂了一点，jedis客户端，对redis cluster的读写分离支持不太好。默认是读和写都到master上去执行，如果你要让最流行的<code>jedis</code>做redis cluster的读写分离的访问，那可能还得自己修改一点jedis的源码，成本比较高。要不然你就是自己基于<code>jedis</code>，封装一下，自己做一个redis cluster的读写分离的访问api。</p>
<p>核心的思路是这样：<strong><code>redis cluster</code>就没有所谓的读写分离的概念了。读写分离是为了要建立一主多从的架构，才能横向任意扩展slave node去支撑更大的读吞吐量。redis cluster的架构下，实际上本身master就是可以任意扩展的，你如果要支撑更大的读吞吐量，或者写吞吐量，或者数据量，直接对master进行横向扩展就可以了，也能实现支撑更高的读吞吐的效果。</strong></p>
<h5 id="实验自动故障切换-gt-高可用性"><a href="#实验自动故障切换-gt-高可用性" class="headerlink" title="实验自动故障切换 -&gt; 高可用性"></a>实验自动故障切换 -&gt; 高可用性</h5><p>我现在把201上的7001给杀掉，看202的7004是否会接替它的位置。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb check 192.168.2.201:7002</span><br><span class="line">&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7002)</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:5461-10922 (5462 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">M: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   slots:0-5460 (5461 slots) master</span><br><span class="line">   0 additional replica(s)</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:10923-16383 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">[OK] All nodes agree about slots configuration.</span><br><span class="line">&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">[OK] All 16384 slots covered.</span><br></pre></td></tr></table></figure>
<p>可以看到，<code>202:7004</code>已经变成了master，现在去7004上获取<code>mykey1</code>的值，看看是否能获取到。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">redis-cli -h 192.168.2.202 -p 7004</span><br><span class="line">192.168.2.202:7004&gt; get mykey1</span><br><span class="line"><span class="string">"v1"</span></span><br></pre></td></tr></table></figure>
<p>再试着把201:7001给重新启动，它将自动作为slave挂载到了202:7004上面去。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br></pre></td><td class="code"><pre><span class="line">/etc/init.d/redis_7001 start</span><br><span class="line">Starting Redis server...</span><br><span class="line">redis-trib.rb check 192.168.2.201:7002</span><br><span class="line">&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7002)</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:5461-10922 (5462 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">M: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   slots:0-5460 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">S: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates cc8a78087798e148b257d2ae33815a25715109e8</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:10923-16383 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">[OK] All nodes agree about slots configuration.</span><br><span class="line">&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">[OK] All 16384 slots covered.</span><br></pre></td></tr></table></figure>
<p>进一步验证，<code>slave0:ip=192.168.2.201,port=7001,state=online,offset=4565,lag=1</code>：</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line">redis-cli -h 192.168.2.202 -p 7004</span><br><span class="line">192.168.2.202:7004&gt; info replication</span><br><span class="line"><span class="comment"># Replication</span></span><br><span class="line">role:master</span><br><span class="line">connected_slaves:1</span><br><span class="line">slave0:ip=192.168.2.201,port=7001,state=online,offset=4565,lag=1</span><br><span class="line">master_replid:38c489e10e3ede8290476aefec3e0ca9822f056e</span><br><span class="line">master_replid2:6451a5d9f0c94fd5191b94898181424c39a24528</span><br><span class="line">master_repl_offset:4565</span><br><span class="line">second_repl_offset:4300</span><br><span class="line">repl_backlog_active:1</span><br><span class="line">repl_backlog_size:1048576</span><br><span class="line">repl_backlog_first_byte_offset:1</span><br><span class="line">repl_backlog_histlen:4565</span><br></pre></td></tr></table></figure>
<h4 id="Redis-Cluster水平扩容"><a href="#Redis-Cluster水平扩容" class="headerlink" title="Redis Cluster水平扩容"></a>Redis Cluster水平扩容</h4><p>之前说了不建议在Redis Cluster上做读写分离，建议直接对master进行水平扩容来横向扩展读写吞吐量，还有支撑海量数据。</p>
<p>假设redis单机，读吞吐是5w/s，写吞吐2w/s。扩展redis到5台master，读吞吐可以达到总量25w/s QPS，写可以达到10w/s QPS。扩容到5台master，能支撑的总的缓存数据量就是30G，40G，如果是100台，那就是600G，800G，甚至1T+的海量数据。</p>
<p>单机Redis的内存一般就6G、8G，如果内存太大，fork类操作的时候很耗时，会导致请求延时的问题。</p>
<p>Redis扩容方法：</p>
<ol>
<li><p>加入新master</p>
<p> 203上执行：</p>
 <figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line">mkdir -p /var/redis/7007</span><br><span class="line"><span class="built_in">cd</span> /etc/redis</span><br><span class="line">cp 7006.conf 7007.conf</span><br><span class="line">vi 7007.conf</span><br><span class="line"><span class="comment"># 改一下里面的配置</span></span><br><span class="line">port 7007</span><br><span class="line">cluster-enabled yes</span><br><span class="line">cluster-config-file /etc/redis-cluster/node-7007.conf</span><br><span class="line">cluster-node-timeout 15000</span><br><span class="line">daemonize	yes							</span><br><span class="line">pidfile		/var/run/redis_7007.pid 						</span><br><span class="line">dir 		/var/redis/7007		</span><br><span class="line">logfile /var/<span class="built_in">log</span>/redis/7007.log</span><br><span class="line"><span class="built_in">bind</span> 192.168.2.203</span><br><span class="line">appendonly yes</span><br><span class="line"></span><br><span class="line"><span class="built_in">cd</span> /etc/init.d/</span><br><span class="line">cp redis_7006 redis_7007</span><br><span class="line">vi redis_7007</span><br><span class="line"><span class="comment"># REDISPORT=7007</span></span><br><span class="line"></span><br><span class="line">/etc/init.d/redis_7007 start</span><br></pre></td></tr></table></figure>
</li>
</ol>
<pre><code>启动完成后，加入master，在201上执行：

<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb add-node 192.168.2.203:7007 192.168.2.201:7001</span><br><span class="line">&gt;&gt;&gt; Adding node 192.168.2.203:7007 to cluster 192.168.2.201:7001</span><br><span class="line">&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7001)</span><br><span class="line">S: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates cc8a78087798e148b257d2ae33815a25715109e8</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">M: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   slots:0-5460 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:10923-16383 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:5461-10922 (5462 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">[OK] All nodes agree about slots configuration.</span><br><span class="line">&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">[OK] All 16384 slots covered.</span><br><span class="line">&gt;&gt;&gt; Send CLUSTER MEET to node 192.168.2.203:7007 to make it join the cluster.</span><br><span class="line">[OK] New node added correctly.</span><br></pre></td></tr></table></figure>


确认一下，发现新加入的7007的master没有被分配任何的slot，所以还要需要处理：

<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br></pre></td><td class="code"><pre><span class="line">	redis-trib.rb check 192.168.2.201:7001</span><br><span class="line">	&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7001)</span><br><span class="line">	S: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">	   slots: (0 slots) slave</span><br><span class="line">	   replicates cc8a78087798e148b257d2ae33815a25715109e8</span><br><span class="line">	M: 5fe91cff7ab6c20b2e2ccc0815b0a7227119f52e 192.168.2.203:7007</span><br><span class="line">	   slots: (0 slots) master</span><br><span class="line">	   0 additional replica(s)</span><br><span class="line">	S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">	   slots: (0 slots) slave</span><br><span class="line">	   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">	M: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">	   slots:0-5460 (5461 slots) master</span><br><span class="line">	   1 additional replica(s)</span><br><span class="line">	M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">	   slots:10923-16383 (5461 slots) master</span><br><span class="line">	   1 additional replica(s)</span><br><span class="line">	S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">	   slots: (0 slots) slave</span><br><span class="line">	   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">	M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">	   slots:5461-10922 (5462 slots) master</span><br><span class="line">	   1 additional replica(s)</span><br><span class="line">	[OK] All nodes agree about slots configuration.</span><br><span class="line">	&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">	&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">	[OK] All 16384 slots covered.</span><br><span class="line">	```	</span><br><span class="line"></span><br><span class="line">2. reshard一些数据过去</span><br><span class="line"></span><br><span class="line">	resharding的意思就是把一部分<span class="built_in">hash</span> slot从一些node上迁移到另外一些node上。</span><br><span class="line">	</span><br><span class="line">	```bash</span><br><span class="line">	redis-trib.rb reshard 192.168.2.201:7001</span><br><span class="line">	How many slots <span class="keyword">do</span> you want to move (from 1 to 16384)? 4096</span><br><span class="line">	What is the receiving node ID? 5fe91cff7ab6c20b2e2ccc0815b0a7227119f52e</span><br><span class="line">	Please enter all the <span class="built_in">source</span> node IDs.</span><br><span class="line">	  Type <span class="string">'all'</span> to use all the nodes as <span class="built_in">source</span> nodes <span class="keyword">for</span> the <span class="built_in">hash</span> slots.</span><br><span class="line">	  Type <span class="string">'done'</span> once you entered all the <span class="built_in">source</span> nodes IDs.</span><br><span class="line">	Source node <span class="comment">#1:cc8a78087798e148b257d2ae33815a25715109e8</span></span><br><span class="line">	Source node <span class="comment">#2:5183cdee2295a07af3e98226887da2a645d979d1</span></span><br><span class="line">	Source node <span class="comment">#3:a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span></span><br><span class="line">	Source node <span class="comment">#4:done</span></span><br></pre></td></tr></table></figure>


要把之前3个master算上，总共4096个hashslot迁移到新的第四个master上去，
</code></pre><ol start="3">
<li><p>添加node作为slave</p>
<p> 203执行：</p>
 <figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line">mkdir -p /var/redis/7008</span><br><span class="line"><span class="built_in">cd</span> /etc/redis</span><br><span class="line">cp 7006.conf 7008.conf</span><br><span class="line">vi 7008.conf</span><br><span class="line"><span class="comment"># 改一下里面的配置</span></span><br><span class="line">port 7008</span><br><span class="line">cluster-enabled yes</span><br><span class="line">cluster-config-file /etc/redis-cluster/node-7008.conf</span><br><span class="line">cluster-node-timeout 15000</span><br><span class="line">daemonize	yes							</span><br><span class="line">pidfile		/var/run/redis_7008.pid 						</span><br><span class="line">dir 		/var/redis/7008		</span><br><span class="line">logfile /var/<span class="built_in">log</span>/redis/7008.log</span><br><span class="line"><span class="built_in">bind</span> 192.168.2.203</span><br><span class="line">appendonly yes</span><br><span class="line"></span><br><span class="line"><span class="built_in">cd</span> /etc/init.d/</span><br><span class="line">cp redis_7006 redis_7008</span><br><span class="line">vi redis_7008</span><br><span class="line"><span class="comment"># REDISPORT=7008</span></span><br><span class="line"></span><br><span class="line">/etc/init.d/redis_7008 start</span><br></pre></td></tr></table></figure>
</li>
</ol>
<pre><code>201执行，将新的节点挂载到7004`cc8a78087798e148b257d2ae33815a25715109e8 `上面去：

<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb add-node --slave --master-id cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.203:7008 192.168.2.201:7001</span><br></pre></td></tr></table></figure>
</code></pre><ol start="4">
<li><p>删除node</p>
<p> 先用resharding将数据都移除到其他节点，确保node为空之后，才能执行remove操作，之前7007上是4096个slot，所以要移动3次，分别是移动1365个slot到7003，1365个slot到7004，1366个slot到7005上。</p>
 <figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb reshard 192.168.2.201:7001</span><br><span class="line">redis-trib.rb del-node 192.168.2.201:7001 5fe91cff7ab6c20b2e2ccc0815b0a7227119f52e</span><br><span class="line">&gt;&gt;&gt; Removing node 5fe91cff7ab6c20b2e2ccc0815b0a7227119f52e from cluster 192.168.2.201:7001</span><br><span class="line">&gt;&gt;&gt; Sending CLUSTER FORGET messages to the cluster...</span><br><span class="line">&gt;&gt;&gt; SHUTDOWN the node.</span><br></pre></td></tr></table></figure>
</li>
</ol>
<pre><code>当你清空了一个master的hashslot时，redis cluster就会自动将其slave挂载到其他master上去，这个时候就只要删除掉master就可以了。
</code></pre><h4 id="Redis-Cluster的Slave自动迁移"><a href="#Redis-Cluster的Slave自动迁移" class="headerlink" title="Redis Cluster的Slave自动迁移"></a>Redis Cluster的Slave自动迁移</h4><p>比如现在有10个master，每个有1个对应的slave，然后现在新增了3个slave作为冗余，有的master就有2个slave了，出现了salve冗余。这个时候如果某个master的slave挂了，那么redis cluster会自动迁移一个冗余的slave给那个挂掉slave的master。</p>
<p>它可以避免这样一个场景：如果你每个master只有一个slave，万一某一个slave死了，然后很快，master也死了，那可用性就降低了。但是如果你给整个集群挂载了一些冗余slave，那么某个master的slave死了，冗余的slave会被自动迁移过去，作为master的新slave，此时即使那个master也死了，还是有一个slave会切换成master的。</p>
<p>上面的实验中有一个master是有冗余slave的，直接让其他master其中的一个slave死掉，然后看有冗余slave会不会自动挂载到那个master，<code>203:7005</code>的master，冗余了一个slave。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb check 192.168.2.201:7001</span><br><span class="line">&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7001)</span><br><span class="line">S: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">M: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   slots:1365-6825 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">S: 77aa78066b1a542e501bd9a0691f5f923529c482 192.168.2.203:7008</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates cc8a78087798e148b257d2ae33815a25715109e8</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:6826,10923-16383 (5462 slots) master</span><br><span class="line">   2 additional replica(s)</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:0-1364,6827-10922 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">[OK] All nodes agree about slots configuration.</span><br><span class="line">&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">[OK] All 16384 slots covered.</span><br></pre></td></tr></table></figure>
<p>现在把<code>203:7008</code>给kill掉，并删除pidfile，这样<code>202:7004</code>就没有slave了，看看Redis Cluster会做些什么。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br></pre></td><td class="code"><pre><span class="line">redis-trib.rb check 192.168.2.201:7001</span><br><span class="line">&gt;&gt;&gt; Performing Cluster Check (using node 192.168.2.201:7001)</span><br><span class="line">S: 158414bbcaa2cf0b9b30a81d2e31fb35ba5b4972 192.168.2.201:7001</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates cc8a78087798e148b257d2ae33815a25715109e8</span><br><span class="line">S: 19f6027db2837cc56dd581a3c826a687d096207a 192.168.2.203:7006</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43</span><br><span class="line">M: cc8a78087798e148b257d2ae33815a25715109e8 192.168.2.202:7004</span><br><span class="line">   slots:1365-6825 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">M: 5183cdee2295a07af3e98226887da2a645d979d1 192.168.2.203:7005</span><br><span class="line">   slots:6826,10923-16383 (5462 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">S: 8861dda48f95e748bc0e7df2757cdc723c897f28 192.168.2.201:7002</span><br><span class="line">   slots: (0 slots) slave</span><br><span class="line">   replicates 5183cdee2295a07af3e98226887da2a645d979d1</span><br><span class="line">M: a7d09608d3669b0bff9152dc4c62fc2f8e5c2e43 192.168.2.202:7003</span><br><span class="line">   slots:0-1364,6827-10922 (5461 slots) master</span><br><span class="line">   1 additional replica(s)</span><br><span class="line">[OK] All nodes agree about slots configuration.</span><br><span class="line">&gt;&gt;&gt; Check <span class="keyword">for</span> open slots...</span><br><span class="line">&gt;&gt;&gt; Check slots coverage...</span><br><span class="line">[OK] All 16384 slots covered.</span><br></pre></td></tr></table></figure>
<p>可以看到，之前<code>203:7005</code>是有2个slave的，Redis Cluster在7008挂掉以后，自动将<code>201:7001</code>作为slave挂载到了<code>202:7004</code>下面。</p>
<h3 id="Redis-Cluster核心原理"><a href="#Redis-Cluster核心原理" class="headerlink" title="Redis Cluster核心原理"></a>Redis Cluster核心原理</h3><h4 id="节点间的内部通信机制"><a href="#节点间的内部通信机制" class="headerlink" title="节点间的内部通信机制"></a>节点间的内部通信机制</h4><h5 id="gossip协议"><a href="#gossip协议" class="headerlink" title="gossip协议"></a>gossip协议</h5><p>redis cluster节点间采取gossip协议进行通信，集群本身有很多元数据，比如hashslot和节点之间的映射，master和slave之间的关系，故障信息等等。</p>
<p>像集中式的存储，例如采用zookeeper集中式的维护和存储元数据。举个例子，集群元数据集中式存储的一个典型的代表，就是大数据领域里面的storm（分布式的大数据实时计算引擎），集中式的元数据存储架构底层是基于zookeeper（分布式协调中间件）的集群，这样来维护所有集群的元数据。</p>
<p>gossip跟集中式不同，不是将集群元数据（节点信息，故障，等等）集中存储在某个节点上，每个节点都持有一份元数据，互相之间不断通信，保持整个集群所有节点的数据是完整的。</p>
<p><code>集中式</code>：<strong>好处在于，元数据的更新和读取，时效性非常好，一旦元数据出现了变更，立即就更新到集中式的存储中，其他节点读取的时候立即就可以感知到; 不好在于，所有的元数据的跟新压力全部集中在一个地方，可能会导致元数据的存储有压力。</strong></p>
<p><code>gossip</code>：<strong>好处在于元数据的更新比较分散，不是集中在一个地方，更新请求会陆陆续续发到所有节点上去更新，有一定的延时，降低了压力; 缺点，元数据更新有延时，可能导致集群的一些操作会有一些滞后。</strong></p>
<h5 id="10000端口"><a href="#10000端口" class="headerlink" title="10000端口"></a>10000端口</h5><p>每个节点都有一个专门用于节点间通信的端口，就是自己提供服务的端口号+10000，比如7001，那么用于节点间通信的就是17001端口。每个节点每隔一段时间都会往另外几个节点发送ping消息，同时其他节点接收到ping之后返回pong。</p>
<h5 id="交换的信息"><a href="#交换的信息" class="headerlink" title="交换的信息"></a>交换的信息</h5><p>节点之间相互交换信息包括故障信息，节点的增加和移除，hash slot信息，等等。</p>
<h4 id="gossip协议-1"><a href="#gossip协议-1" class="headerlink" title="gossip协议"></a>gossip协议</h4><p>gossip协议包含多种消息，包括ping，pong，meet，fail，等等。</p>
<ul>
<li><p>ping: 每个节点都会频繁给其他节点发送ping，其中包含自己的状态还有自己维护的集群元数据，互相通过ping交换元数据。</p>
</li>
<li><p>meet: 某个节点发送meet给新加入的节点，让新节点加入集群中，然后新节点就会开始与其他节点进行通信。</p>
<p>  <code>redis-trib.rb add-node</code>其实内部就是发送了一个gossip meet消息，给新加入的节点，通知那个节点去加入我们的集群。</p>
</li>
<li><p>pong: 返回ping和meet，包含自己的状态和其他信息，也可以用于信息广播和更新。</p>
</li>
<li>fail: 某个节点判断另一个节点fail之后，就发送fail给其他节点，通知其他节点，指定的节点宕机了。</li>
</ul>
<p>ping很频繁，而且要携带一些元数据，所以可能会加重网络负担，每个节点每秒会执行10次ping，每次会选择5个最久没有通信的其他节点。当然如果发现某个节点通信延时达到了<code>cluster_node_timeout / 2</code>，那么立即发送ping，避免数据交换延时过长，落后的时间太长了，比如说，两个节点之间都10分钟没有交换数据了，那么整个集群处于严重的元数据不一致的情况，就会有问题。<code>cluster_node_timeout</code>可以调整，如果值比较大，那么会降低发送的频率，每次ping的目的一个是带上自己节点的信息，还有就是带上1/10其他节点的信息一起发送出去，跟其他节点进行数据交换。每次至少发给3个其他节点，最多发送总节点-2个其他节点。</p>
<h3 id="面向集群的jedis内部实现原理"><a href="#面向集群的jedis内部实现原理" class="headerlink" title="面向集群的jedis内部实现原理"></a>面向集群的jedis内部实现原理</h3><p>jedis cluster api与redis cluster集群交互的一些基本原理。</p>
<h4 id="基于重定向的客户端"><a href="#基于重定向的客户端" class="headerlink" title="基于重定向的客户端"></a>基于重定向的客户端</h4><ol>
<li><p>请求重定向</p>
<p> 客户端可能会挑选任意一个redis实例去发送命令，每个redis实例接收到命令，都会计算key对应的hash slot，如果在本地就在本地处理，否则返回moved给客户端，让客户端进行重定向。<code>cluster keyslot mykey</code>，可以查看一个key对应的hash slot是什么。用redis-cli的时候，可以加入-c参数，支持自动的请求重定向，redis-cli接收到moved之后，会自动重定向到对应的节点执行命令。</p>
</li>
<li><p>计算hash slot</p>
<p> 计算hash slot的算法，就是根据key计算CRC16值，然后对16384取模，拿到对应的hash slot。用hash tag可以手动指定key对应的slot，同一个hash tag下的key，都会在一个hash slot中，比如<code>set mykey1:{100}</code>和<code>set mykey2:{100}</code>。</p>
</li>
<li><p>hash slot查找</p>
<p> 节点间通过gossip协议进行数据交换，这样就知道每个hash slot在哪个节点上。</p>
</li>
</ol>
<h4 id="smart-jedis"><a href="#smart-jedis" class="headerlink" title="smart jedis"></a>smart jedis</h4><ol>
<li><p>什么是smart jedis</p>
<p> <strong>基于重定向的客户端，很消耗网络IO，因为大部分情况下，可能都会出现一次请求重定向，才能找到正确的节点。</strong>所以大部分的客户端，比如java redis客户端，就是jedis，都是smart的，本地维护一份hashslot -&gt; node的映射表，大部分情况下，直接走本地缓存就可以找到hashslot -&gt; node，不需要通过节点进行moved重定向。</p>
</li>
<li><p>JedisCluster的工作原理</p>
<p> 在JedisCluster初始化的时候，就会随机选择一个node，初始化hashslot -&gt; node映射表，同时为每个节点创建一个JedisPool连接池。每次基于JedisCluster执行操作，首先JedisCluster都会在本地计算key的hashslot，然后在本地映射表找到对应的节点。如果那个node正好还是持有那个hashslot，那么就ok; 如果进行了reshard这样的操作，可能hashslot已经不在那个node上了，就会返回moved，那么利用该节点的元数据，更新本地的hashslot -&gt; node映射表缓存。重复上面几个步骤，直到找到对应的节点，如果重试超过5次，那么就报错，JedisClusterMaxRedirectionException。jedis老版本，可能会出现在集群某个节点故障还没完成自动切换恢复时，频繁更新hash slot，频繁ping节点检查活跃，导致大量网络IO开销，jedis最新版本，对于这些过度的hash slot更新和ping，都进行了优化，避免了类似问题。</p>
</li>
<li><p>hashslot迁移和ask重定向</p>
<p> 如果hash slot正在迁移，那么会返回ask重定向给jedis，jedis接收到ask重定向之后，会重新定位到目标节点去执行，但是因为ask发生在hash slot迁移过程中，所以JedisCluster API收到ask是不会更新hashslot本地缓存。如果确定hashslot已经迁移完了，moved还是会更新本地hashslot-&gt;node映射表缓存的。</p>
</li>
</ol>
<h3 id="Redis-Cluster高可用性与主备切换原理"><a href="#Redis-Cluster高可用性与主备切换原理" class="headerlink" title="Redis Cluster高可用性与主备切换原理"></a>Redis Cluster高可用性与主备切换原理</h3><p>redis cluster的高可用的原理，几乎跟哨兵是类似的</p>
<ol>
<li><p>判断节点宕机</p>
<p> 如果一个节点认为另外一个节点宕机，那么就是pfail，主观宕机。</p>
<p> 如果多个节点都认为另外一个节点宕机了，那么就是fail，客观宕机，跟哨兵的原理几乎一样，sdown，odown。</p>
<p> 在<code>cluster-node-timeout</code>内，某个节点一直没有返回pong，那么就被认为pfail。</p>
<p> 如果一个节点认为某个节点pfail了，那么会在gossip ping消息中，ping给其他节点，如果超过半数的节点都认为pfail了，那么就会变成fail。</p>
</li>
<li><p>从节点过滤</p>
<p> 对宕机的master node，从其所有的slave node中，选择一个切换成master node。检查每个slave node与master node断开连接的时间，如果超过了<code>cluster-node-timeout * cluster-slave-validity-factor</code>，那么就没有资格切换成master，这个从节点超时过滤的步骤也是跟哨兵是一样。</p>
</li>
<li><p>从节点选举</p>
<blockquote>
<p>哨兵：对所有从节点进行排序，先排slave priority，然后offset，最后是run id</p>
</blockquote>
<p> 每个从节点，都根据自己对master复制数据的offset，来设置一个选举时间，offset越大（复制数据越多）的从节点，选举时间越靠前，优先进行选举</p>
<p> 所有的master node开始slave选举投票，给要进行选举的slave进行投票，如果大部分master node（N/2 + 1）都投票给了某个从节点，那么选举通过，那个从节点可以切换成master</p>
<p> 从节点执行主备切换，从节点切换为主节点</p>
</li>
<li><p>与哨兵比较</p>
<p> 整个流程跟哨兵相比，非常类似，所以说redis cluster功能强大，直接集成了replication和sentinal的功能。</p>
</li>
</ol>
<h2 id="Redis在实践中的常见问题以及优化思路"><a href="#Redis在实践中的常见问题以及优化思路" class="headerlink" title="Redis在实践中的常见问题以及优化思路"></a>Redis在实践中的常见问题以及优化思路</h2><h3 id="fork耗时导致高并发请求延时"><a href="#fork耗时导致高并发请求延时" class="headerlink" title="fork耗时导致高并发请求延时"></a>fork耗时导致高并发请求延时</h3><p>RDB和AOF的时候，其实会有生成RDB快照，AOF rewrite，耗费磁盘IO的过程，主进程fork子进程。fork的时候，子进程是需要拷贝父进程的空间内存页表的，也是会耗费一定的时间的，一般来说，如果父进程内存有1个G的数据，那么fork可能会耗费在20ms左右，如果是10G~30G，那么就会耗费20 <em> 10，甚至20 </em> 30，也就是几百毫秒的时间。</p>
<p><code>info stats</code>中的latest_fork_usec，可以看到最近一次fork的时长。redis单机QPS一般在几万，fork可能一下子就会拖慢几万条操作的请求时长，从几毫秒变成1秒。</p>
<p>优化思路：</p>
<p>fork耗时跟redis主进程的内存有关系，一般控制redis的内存在10GB以内，slave -&gt; master，全量复制很耗时。</p>
<h3 id="AOF的阻塞问题"><a href="#AOF的阻塞问题" class="headerlink" title="AOF的阻塞问题"></a>AOF的阻塞问题</h3><p>redis将数据写入AOF缓冲区，单独开一个现场做fsync操作，每秒一次。但是redis主线程会检查两次fsync的时间，如果距离上次fsync时间超过了2秒，那么<strong>数据写请求</strong>就会阻塞。everysec，最多丢失2秒的数据，一旦fsync超过2秒的延时，整个redis就被拖慢。</p>
<p>优化思路：</p>
<p>优化硬盘写入速度，建议采用SSD，不要用普通的机械硬盘，SSD，大幅度提升磁盘读写的速度。</p>
<h3 id="主从复制延迟问题"><a href="#主从复制延迟问题" class="headerlink" title="主从复制延迟问题"></a>主从复制延迟问题</h3><p>主从复制可能会超时严重，这个时候需要良好的监控和报警机制。在<code>info replication</code>中，可以看到master和slave复制的offset，做一个差值就可以看到对应的延迟量，如果延迟过多，那么就进行报警。这个问题主要是做好监控。</p>
<h3 id="主从复制风暴问题"><a href="#主从复制风暴问题" class="headerlink" title="主从复制风暴问题"></a>主从复制风暴问题</h3><p>如果一下子让多个slave从master去执行全量复制，一份大的rdb同时发送到多个slave，会导致网络带宽被严重占用。如果一个master真的要挂载多个slave，那尽量用树状结构，不要用星型结构。</p>
<h3 id="vm-overcommit-memory"><a href="#vm-overcommit-memory" class="headerlink" title="vm.overcommit_memory"></a>vm.overcommit_memory</h3><p>0: 检查有没有足够内存，没有的话申请内存失败<br>1: 允许使用内存直到用完为止<br>2: 内存地址空间不能超过swap + 50%</p>
<p>如果是0的话，可能导致类似fork等操作执行失败，申请不到足够的内存空间</p>
<pre><code>cat /proc/sys/vm/overcommit_memory
echo &quot;vm.overcommit_memory=1&quot; &gt;&gt; /etc/sysctl.conf
sysctl vm.overcommit_memory=1
</code></pre><h3 id="swapiness"><a href="#swapiness" class="headerlink" title="swapiness"></a>swapiness</h3><pre><code>cat /proc/version，查看linux内核版本
</code></pre><p>如果linux内核版本&lt;3.5，那么swapiness设置为0，这样系统宁愿swap也不会oom killer（杀掉进程）<br>如果linux内核版本&gt;=3.5，那么swapiness设置为1，这样系统宁愿swap也不会oom killer</p>
<p>这样可以保证redis不会被杀掉</p>
<pre><code>echo 0 &gt; /proc/sys/vm/swappiness
echo vm.swapiness=0 &gt;&gt; /etc/sysctl.conf
</code></pre><h3 id="最大打开文件句柄"><a href="#最大打开文件句柄" class="headerlink" title="最大打开文件句柄"></a>最大打开文件句柄</h3><pre><code>ulimit -n 10032 10032
</code></pre><p>去上网搜一下，不同的操作系统，版本，设置的方式都不太一样</p>
<h3 id="tcp-backlog"><a href="#tcp-backlog" class="headerlink" title="tcp backlog"></a>tcp backlog</h3><pre><code>cat /proc/sys/net/core/somaxconn
echo 511 &gt; /proc/sys/net/core/somaxconn
</code></pre><h2 id="Redis总结"><a href="#Redis总结" class="headerlink" title="Redis总结"></a>Redis总结</h2><p>如果你的数据量不大，单master就可以容纳，一般来说你的缓存的总量在10G以内就可以，那么建议按照以下架构去部署redis。</p>
<p>redis持久化+备份方案+容灾方案+replication（主从+读写分离）+sentinal（哨兵集群，3个节点，高可用性），可以支撑的数据量在10G以内，可以支撑的写QPS在几万左右，可以支撑的读QPS可以上10万以上（随你的需求，水平扩容slave节点就可以），可用性在99.99%。</p>
<p>如果你的数据量很大，比如（国内排名前三的大电商网站，x宝，x东，x宁易购），数据量是很大的，redis cluster多master分布式存储数据，可以水平扩容。如果要支撑更多的数据量，1T+以上没问题，只要扩容master即可，读写QPS分别都达到几十万都没问题，只要扩容master，redis cluster对读写分离支持不太好，需要执行<code>readonly</code>才能去slave上读。</p>
<p>Redis Cluster支撑99.99%可用性也没问题，slave -&gt; master的主备切换，冗余slave去进一步提升可用性的方案（每个master挂一个slave，但是整个集群再加个3个slave冗余一下）。</p>

      
    </div>

    

    
    
    

    

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div>坚持原创技术分享，您的支持将鼓励我继续创作！</div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="/img/donate/wechatpay.jpeg" alt="杨帆 微信支付"/>
        <p>微信支付</p>
      </div>
    

    
      <div id="alipay" style="display: inline-block">
        <img id="alipay_qr" src="/img/donate/alipay.jpeg" alt="杨帆 支付宝"/>
        <p>支付宝</p>
      </div>
    

    

  </div>
</div>

      </div>
    

    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/redis/" rel="tag"># redis</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2018/02/12/cache01/" rel="next" title="高可用缓存架构实战1-Redis配置和持久化">
                <i class="fa fa-chevron-left"></i> 高可用缓存架构实战1-Redis配置和持久化
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2018/02/12/cache02/" rel="prev" title="高可用缓存架构实战2-Redis企业级应用实战">
                高可用缓存架构实战2-Redis企业级应用实战 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          

  
    <div class="comments" id="comments">
      <div id="lv-container" data-id="city" data-uid="MTAyMC8yOTk2Ni82NTMx"></div>
    </div>

  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview-wrap">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview-wrap sidebar-panel">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image"
                src="/img/photo/bug.png"
                alt="杨帆" />
            
              <p class="site-author-name" itemprop="name">杨帆</p>
              <p class="site-description motion-element" itemprop="description">记录工作和学习中遇到的问题</p>
          </div>

          
            <nav class="site-state motion-element">
              
                <div class="site-state-item site-state-posts">
                
                  <a href="/archives/">
                
                    <span class="site-state-item-count">107</span>
                    <span class="site-state-item-name">日志</span>
                  </a>
                </div>
              

              
                
                
                <div class="site-state-item site-state-categories">
                  <a href="/categories/index.html">
                    
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                    <span class="site-state-item-count">18</span>
                    <span class="site-state-item-name">分类</span>
                  </a>
                </div>
              

              
                
                
                <div class="site-state-item site-state-tags">
                  <a href="/tags/index.html">
                    
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                    <span class="site-state-item-count">38</span>
                    <span class="site-state-item-name">标签</span>
                  </a>
                </div>
              
            </nav>
          

          
            <div class="feed-link motion-element">
              <a href="/atom.xml" rel="alternate">
                <i class="fa fa-rss"></i>
                RSS
              </a>
            </div>
          

          
            <div class="links-of-author motion-element">
              
                <span class="links-of-author-item">
                  <a href="https://github.com/sail-y" target="_blank" title="GitHub"><i class="fa fa-fw fa-github"></i>GitHub</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://weibo.com/338632221" target="_blank" title="微博"><i class="fa fa-fw fa-globe"></i>微博</a>
                  
                </span>
              
            </div>
          

          
          

          
          

          
            
          
          

        </div>
      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#Redis高可用集群实战"><span class="nav-number">1.</span> <span class="nav-text">Redis高可用集群实战</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#如何做到99-99-高可用性"><span class="nav-number">1.1.</span> <span class="nav-text">如何做到99.99%高可用性</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Redis哨兵架构介绍"><span class="nav-number">1.2.</span> <span class="nav-text">Redis哨兵架构介绍</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#哨兵的核心知识"><span class="nav-number">1.2.1.</span> <span class="nav-text">哨兵的核心知识</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#为什么redis哨兵集群只有2个节点无法正常工作？"><span class="nav-number">1.2.1.1.</span> <span class="nav-text">为什么redis哨兵集群只有2个节点无法正常工作？</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#经典的3节点哨兵集群"><span class="nav-number">1.2.1.2.</span> <span class="nav-text">经典的3节点哨兵集群</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#数据丢失问题"><span class="nav-number">1.2.2.</span> <span class="nav-text">数据丢失问题</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#解决异步复制和脑裂导致的数据丢失"><span class="nav-number">1.2.2.1.</span> <span class="nav-text">解决异步复制和脑裂导致的数据丢失</span></a></li></ol></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#哨兵原理详解"><span class="nav-number">1.3.</span> <span class="nav-text">哨兵原理详解</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#sdown和odown转换机制"><span class="nav-number">1.3.1.</span> <span class="nav-text">sdown和odown转换机制</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#哨兵集群的自动发现机制"><span class="nav-number">1.3.2.</span> <span class="nav-text">哨兵集群的自动发现机制</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#slave配置的自动纠正"><span class="nav-number">1.3.3.</span> <span class="nav-text">slave配置的自动纠正</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#slave-gt-master选举算法"><span class="nav-number">1.3.4.</span> <span class="nav-text">slave-&gt;master选举算法</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#quorum和majority"><span class="nav-number">1.3.5.</span> <span class="nav-text">quorum和majority</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#configuration-epoch"><span class="nav-number">1.3.6.</span> <span class="nav-text">configuration epoch</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#configuraiton传播"><span class="nav-number">1.3.7.</span> <span class="nav-text">configuraiton传播</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#哨兵集群实战"><span class="nav-number">1.4.</span> <span class="nav-text">哨兵集群实战</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#哨兵的配置文件"><span class="nav-number">1.4.1.</span> <span class="nav-text">哨兵的配置文件</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#在eshop-cache03上再部署一个Redis"><span class="nav-number">1.4.2.</span> <span class="nav-text">在eshop-cache03上再部署一个Redis</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#配置哨兵"><span class="nav-number">1.4.3.</span> <span class="nav-text">配置哨兵</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#启动哨兵进程"><span class="nav-number">1.4.4.</span> <span class="nav-text">启动哨兵进程</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#检查哨兵状态"><span class="nav-number">1.4.5.</span> <span class="nav-text">检查哨兵状态</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#哨兵管理和容灾演练"><span class="nav-number">1.5.</span> <span class="nav-text">哨兵管理和容灾演练</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#哨兵节点的增加和删除"><span class="nav-number">1.5.1.</span> <span class="nav-text">哨兵节点的增加和删除</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#slave的永久下线"><span class="nav-number">1.5.2.</span> <span class="nav-text">slave的永久下线</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#slave切换为Master的优先级"><span class="nav-number">1.5.3.</span> <span class="nav-text">slave切换为Master的优先级</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#基于哨兵集群架构下的安全认证"><span class="nav-number">1.5.4.</span> <span class="nav-text">基于哨兵集群架构下的安全认证</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#容灾演练"><span class="nav-number">1.5.5.</span> <span class="nav-text">容灾演练</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#哨兵的生产环境部署"><span class="nav-number">1.5.6.</span> <span class="nav-text">哨兵的生产环境部署</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#如何让Redis支持1T以上大数据"><span class="nav-number">1.6.</span> <span class="nav-text">如何让Redis支持1T以上大数据</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#单Master的redis在海量数据面前的瓶颈"><span class="nav-number">1.6.1.</span> <span class="nav-text">单Master的redis在海量数据面前的瓶颈</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#怎么才能够突破单机瓶颈，让redis支撑海量数据？"><span class="nav-number">1.6.2.</span> <span class="nav-text">怎么才能够突破单机瓶颈，让redis支撑海量数据？</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Redis集群架构"><span class="nav-number">1.6.3.</span> <span class="nav-text">Redis集群架构</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#redis-cluster-vs-replication-sentinel"><span class="nav-number">1.6.4.</span> <span class="nav-text">redis cluster vs. replication + sentinel</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#分布式数据存储的核心算法"><span class="nav-number">1.6.5.</span> <span class="nav-text">分布式数据存储的核心算法</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Redis-Cluster介绍"><span class="nav-number">1.7.</span> <span class="nav-text">Redis Cluster介绍</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#Redis-Cluster实战部署"><span class="nav-number">1.7.1.</span> <span class="nav-text">Redis Cluster实战部署</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#Redis-Cluster的重要配置"><span class="nav-number">1.7.1.1.</span> <span class="nav-text">Redis Cluster的重要配置</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#在三台机器上启动6个redis实例"><span class="nav-number">1.7.1.2.</span> <span class="nav-text">在三台机器上启动6个redis实例</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#准备生产环境的启动脚本"><span class="nav-number">1.7.1.3.</span> <span class="nav-text">准备生产环境的启动脚本</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#创建Redis集群"><span class="nav-number">1.7.1.4.</span> <span class="nav-text">创建Redis集群</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Redis-Cluster测试"><span class="nav-number">1.7.1.5.</span> <span class="nav-text">Redis Cluster测试</span></a><ol class="nav-child"><li class="nav-item nav-level-5"><a class="nav-link" href="#实验多master写入-gt-海量数据的分布式存储"><span class="nav-number">1.7.1.5.1.</span> <span class="nav-text">实验多master写入 -&gt; 海量数据的分布式存储</span></a></li><li class="nav-item nav-level-5"><a class="nav-link" href="#实验不同master各自的slave读取-gt-读写分离"><span class="nav-number">1.7.1.5.2.</span> <span class="nav-text">实验不同master各自的slave读取 -&gt; 读写分离</span></a></li><li class="nav-item nav-level-5"><a class="nav-link" href="#实验自动故障切换-gt-高可用性"><span class="nav-number">1.7.1.5.3.</span> <span class="nav-text">实验自动故障切换 -&gt; 高可用性</span></a></li></ol></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Redis-Cluster水平扩容"><span class="nav-number">1.7.1.6.</span> <span class="nav-text">Redis Cluster水平扩容</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Redis-Cluster的Slave自动迁移"><span class="nav-number">1.7.1.7.</span> <span class="nav-text">Redis Cluster的Slave自动迁移</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Redis-Cluster核心原理"><span class="nav-number">1.7.2.</span> <span class="nav-text">Redis Cluster核心原理</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#节点间的内部通信机制"><span class="nav-number">1.7.2.1.</span> <span class="nav-text">节点间的内部通信机制</span></a><ol class="nav-child"><li class="nav-item nav-level-5"><a class="nav-link" href="#gossip协议"><span class="nav-number">1.7.2.1.1.</span> <span class="nav-text">gossip协议</span></a></li><li class="nav-item nav-level-5"><a class="nav-link" href="#10000端口"><span class="nav-number">1.7.2.1.2.</span> <span class="nav-text">10000端口</span></a></li><li class="nav-item nav-level-5"><a class="nav-link" href="#交换的信息"><span class="nav-number">1.7.2.1.3.</span> <span class="nav-text">交换的信息</span></a></li></ol></li><li class="nav-item nav-level-4"><a class="nav-link" href="#gossip协议-1"><span class="nav-number">1.7.2.2.</span> <span class="nav-text">gossip协议</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#面向集群的jedis内部实现原理"><span class="nav-number">1.7.3.</span> <span class="nav-text">面向集群的jedis内部实现原理</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#基于重定向的客户端"><span class="nav-number">1.7.3.1.</span> <span class="nav-text">基于重定向的客户端</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#smart-jedis"><span class="nav-number">1.7.3.2.</span> <span class="nav-text">smart jedis</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Redis-Cluster高可用性与主备切换原理"><span class="nav-number">1.7.4.</span> <span class="nav-text">Redis Cluster高可用性与主备切换原理</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Redis在实践中的常见问题以及优化思路"><span class="nav-number">1.8.</span> <span class="nav-text">Redis在实践中的常见问题以及优化思路</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#fork耗时导致高并发请求延时"><span class="nav-number">1.8.1.</span> <span class="nav-text">fork耗时导致高并发请求延时</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#AOF的阻塞问题"><span class="nav-number">1.8.2.</span> <span class="nav-text">AOF的阻塞问题</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#主从复制延迟问题"><span class="nav-number">1.8.3.</span> <span class="nav-text">主从复制延迟问题</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#主从复制风暴问题"><span class="nav-number">1.8.4.</span> <span class="nav-text">主从复制风暴问题</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#vm-overcommit-memory"><span class="nav-number">1.8.5.</span> <span class="nav-text">vm.overcommit_memory</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#swapiness"><span class="nav-number">1.8.6.</span> <span class="nav-text">swapiness</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#最大打开文件句柄"><span class="nav-number">1.8.7.</span> <span class="nav-text">最大打开文件句柄</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#tcp-backlog"><span class="nav-number">1.8.8.</span> <span class="nav-text">tcp backlog</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Redis总结"><span class="nav-number">1.9.</span> <span class="nav-text">Redis总结</span></a></li></ol></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; 2015 &mdash; <span itemprop="copyrightYear">2018</span>
  <span class="with-love" id="animate">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">杨帆</span>

  

  
</div>




  <div class="powered-by">由 <a class="theme-link" target="_blank" href="https://hexo.io">Hexo</a> 强力驱动 v3.7.1</div>



  <span class="post-meta-divider">|</span>



  <div class="theme-info">主题 &mdash; <a class="theme-link" target="_blank" href="https://github.com/theme-next/hexo-theme-next">NexT.Gemini</a> v6.1.0</div>




        
<div class="busuanzi-count">
  <script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>

  
    <span class="site-uv" title="总访客量">
      <i class="fa fa-user"></i>
      <span class="busuanzi-value" id="busuanzi_value_site_uv"></span>
    </span>
  

  
    <span class="site-pv" title="总访问量">
      <i class="fa fa-eye"></i>
      <span class="busuanzi-value" id="busuanzi_value_site_pv"></span>
    </span>
  
</div>









        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>














  



  
  











  
  
    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/canvas-nest/canvas-nest.min.js"></script>
  

  
  
    <script type="text/javascript" src="/lib/three/three.min.js"></script>
  

  
  
    <script type="text/javascript" src="/lib/three/three-waves.min.js"></script>
  


  


  <script type="text/javascript" src="/js/src/utils.js?v=6.1.0"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=6.1.0"></script>



  
  


  <script type="text/javascript" src="/js/src/affix.js?v=6.1.0"></script>

  <script type="text/javascript" src="/js/src/schemes/pisces.js?v=6.1.0"></script>



  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=6.1.0"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=6.1.0"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=6.1.0"></script>



  



	





  





  
    <script type="text/javascript">
      (function(d, s) {
        var j, e = d.getElementsByTagName(s)[0];
        if (typeof LivereTower === 'function') { return; }
        j = d.createElement(s);
        j.src = 'https://cdn-city.livere.com/js/embed.dist.js';
        j.async = true;
        e.parentNode.insertBefore(j, e);
      })(document, 'script');
    </script>
  










  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'auto') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  

  
  

  

  

  

  

</body>
</html>
